LOAD DATA

df <- readRDS("../data/models/social-risk-crash-rate-data.rds") %>%
    select(-geoid)
glimpse(df)
Rows: 13,518
Columns: 44
$ year                    <int> 2018, 2019, 2020, 2021, 2022, 2023, 2018, 2…
$ total_population        <int> 51349, 50967, 49194, 52308, 59590, 63478, 7…
$ pct_male_population     <dbl> 12.460865, 11.694881, 12.229106, 13.550457,…
$ pct_female_population   <dbl> 10.846132, 11.629654, 11.054169, 9.837846, …
$ pct_white_population    <dbl> 4.1225202, 3.6815086, 2.5856754, 2.3610065,…
$ pct_black_population    <dbl> 2.435429, 2.630197, 2.833673, 2.412624, 2.4…
$ pct_asian_population    <dbl> 0.19803330, 0.14388387, 0.23376782, 0.30587…
$ pct_hispanic_population <dbl> 6.411328, 6.607147, 5.982424, 6.079353, 5.2…
$ pct_foreign_born        <dbl> 2.909566, 2.442189, 2.187254, 2.200420, 3.0…
$ pct_age_under_18        <dbl> 2.130762, 2.643626, 2.333613, 2.387771, 1.7…
$ pct_age_18_34           <dbl> 1.715654, 1.653705, 1.882339, 1.963363, 2.0…
$ pct_age_35_64           <dbl> 3.296112, 3.079115, 3.041014, 3.043500, 3.3…
$ pct_age_65_plus         <dbl> 1.80895804, 1.78607845, 1.56929356, 1.57910…
$ median_income           <dbl> 58582.658, 49964.513, 68000.000, 70867.000,…
$ pct_income_under_25k    <dbl> 0.5445916, 0.5544325, 0.5325841, 0.5142597,…
$ pct_income_25k_75k      <dbl> 1.0568123, 1.1376418, 0.9533662, 0.8774915,…
$ pct_income_75k_plus     <dbl> 0.9273290, 0.8824877, 1.2379531, 1.2693994,…
$ pct_below_poverty       <dbl> 1.9574830, 1.9510653, 1.8091597, 1.9385106,…
$ median_gross_rent       <dbl> 1579.1133, 1524.3577, 1701.0000, 1740.0000,…
$ pct_owner_occupied      <dbl> 1.33318303, 1.35699756, 1.58439699, 1.46745…
$ pct_renter_occupied     <dbl> 1.2085934, 1.2305140, 1.1518859, 1.2057574,…
$ pct_no_vehicle          <dbl> 0.5122207, 0.6522735, 0.6118619, 0.6270527,…
$ pct_less_than_hs        <dbl> 1.4509748, 1.1951954, 1.1302166, 1.0495486,…
$ pct_hs_diploma          <dbl> 1.5576081, 1.4196542, 1.2704773, 1.3841042,…
$ pct_some_college        <dbl> 0.8473540, 0.8997538, 0.7256966, 0.9348439,…
$ pct_associates_degree   <dbl> 0.4912749, 0.3280552, 0.3923234, 0.3230851,…
$ pct_bachelors_degree    <dbl> 0.9482748, 0.9457966, 0.8537607, 0.9023442,…
$ pct_graduate_degree     <dbl> 0.3998749, 0.7098271, 1.1037907, 0.9673436,…
$ pct_in_labor_force      <dbl> 3.566504, 3.430191, 3.555304, 3.595995, 4.0…
$ pct_not_in_labor_force  <dbl> 3.448445, 3.326595, 3.162980, 3.106587, 2.8…
$ unemployment_rate       <dbl> 15.750133, 13.478747, 10.806175, 11.536417,…
$ pct_commute_short       <dbl> 0.7350082, 0.4604284, 0.1585556, 0.4167607,…
$ pct_commute_medium      <dbl> 1.7061331, 1.6882374, 1.2521824, 1.2579290,…
$ pct_commute_long        <dbl> 2.477320, 2.685832, 3.553271, 3.737464, 4.6…
$ pct_carpool             <dbl> 0.35798327, 0.31462606, 0.00000000, 0.04970…
$ pct_public_transit      <dbl> 2.056500, 2.540030, 2.898721, 2.366742, 2.8…
$ pct_walk                <dbl> 0.37702494, 0.30695226, 0.13009688, 0.76469…
$ pct_bike                <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000…
$ pct_work_from_home      <dbl> 0.01713750, 0.04604284, 0.04675356, 0.05161…
$ crash_rate_per_1000     <dbl> 0.8373993, 0.5886150, 0.6301567, 0.5735238,…
$ injury_rate_per_1000    <dbl> 0.2142184, 0.1569640, 0.2439316, 0.2294095,…
$ fatality_rate_per_1000  <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000…
$ pct_vehicle             <dbl> 2.0165122, 1.9222885, 2.1120415, 2.0340979,…
$ borough                 <chr> "Bronx", "Bronx", "Bronx", "Bronx", "Bronx"…

ADD PANDEMIC TEMPORAL FEATURE

df <- df %>%
  mutate(
    post_pandemic = ifelse(year < 2020, "pre", "post")  # pre vs. post 2020
  ) %>%
  mutate(post_pandemic = as.integer(post_pandemic == "post"))

glimpse(df)
Rows: 13,518
Columns: 45
$ year                    <int> 2018, 2019, 2020, 2021, 2022, 2023, 2018, 2…
$ total_population        <int> 51349, 50967, 49194, 52308, 59590, 63478, 7…
$ pct_male_population     <dbl> 12.460865, 11.694881, 12.229106, 13.550457,…
$ pct_female_population   <dbl> 10.846132, 11.629654, 11.054169, 9.837846, …
$ pct_white_population    <dbl> 4.1225202, 3.6815086, 2.5856754, 2.3610065,…
$ pct_black_population    <dbl> 2.435429, 2.630197, 2.833673, 2.412624, 2.4…
$ pct_asian_population    <dbl> 0.19803330, 0.14388387, 0.23376782, 0.30587…
$ pct_hispanic_population <dbl> 6.411328, 6.607147, 5.982424, 6.079353, 5.2…
$ pct_foreign_born        <dbl> 2.909566, 2.442189, 2.187254, 2.200420, 3.0…
$ pct_age_under_18        <dbl> 2.130762, 2.643626, 2.333613, 2.387771, 1.7…
$ pct_age_18_34           <dbl> 1.715654, 1.653705, 1.882339, 1.963363, 2.0…
$ pct_age_35_64           <dbl> 3.296112, 3.079115, 3.041014, 3.043500, 3.3…
$ pct_age_65_plus         <dbl> 1.80895804, 1.78607845, 1.56929356, 1.57910…
$ median_income           <dbl> 58582.658, 49964.513, 68000.000, 70867.000,…
$ pct_income_under_25k    <dbl> 0.5445916, 0.5544325, 0.5325841, 0.5142597,…
$ pct_income_25k_75k      <dbl> 1.0568123, 1.1376418, 0.9533662, 0.8774915,…
$ pct_income_75k_plus     <dbl> 0.9273290, 0.8824877, 1.2379531, 1.2693994,…
$ pct_below_poverty       <dbl> 1.9574830, 1.9510653, 1.8091597, 1.9385106,…
$ median_gross_rent       <dbl> 1579.1133, 1524.3577, 1701.0000, 1740.0000,…
$ pct_owner_occupied      <dbl> 1.33318303, 1.35699756, 1.58439699, 1.46745…
$ pct_renter_occupied     <dbl> 1.2085934, 1.2305140, 1.1518859, 1.2057574,…
$ pct_no_vehicle          <dbl> 0.5122207, 0.6522735, 0.6118619, 0.6270527,…
$ pct_less_than_hs        <dbl> 1.4509748, 1.1951954, 1.1302166, 1.0495486,…
$ pct_hs_diploma          <dbl> 1.5576081, 1.4196542, 1.2704773, 1.3841042,…
$ pct_some_college        <dbl> 0.8473540, 0.8997538, 0.7256966, 0.9348439,…
$ pct_associates_degree   <dbl> 0.4912749, 0.3280552, 0.3923234, 0.3230851,…
$ pct_bachelors_degree    <dbl> 0.9482748, 0.9457966, 0.8537607, 0.9023442,…
$ pct_graduate_degree     <dbl> 0.3998749, 0.7098271, 1.1037907, 0.9673436,…
$ pct_in_labor_force      <dbl> 3.566504, 3.430191, 3.555304, 3.595995, 4.0…
$ pct_not_in_labor_force  <dbl> 3.448445, 3.326595, 3.162980, 3.106587, 2.8…
$ unemployment_rate       <dbl> 15.750133, 13.478747, 10.806175, 11.536417,…
$ pct_commute_short       <dbl> 0.7350082, 0.4604284, 0.1585556, 0.4167607,…
$ pct_commute_medium      <dbl> 1.7061331, 1.6882374, 1.2521824, 1.2579290,…
$ pct_commute_long        <dbl> 2.477320, 2.685832, 3.553271, 3.737464, 4.6…
$ pct_carpool             <dbl> 0.35798327, 0.31462606, 0.00000000, 0.04970…
$ pct_public_transit      <dbl> 2.056500, 2.540030, 2.898721, 2.366742, 2.8…
$ pct_walk                <dbl> 0.37702494, 0.30695226, 0.13009688, 0.76469…
$ pct_bike                <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000…
$ pct_work_from_home      <dbl> 0.01713750, 0.04604284, 0.04675356, 0.05161…
$ crash_rate_per_1000     <dbl> 0.8373993, 0.5886150, 0.6301567, 0.5735238,…
$ injury_rate_per_1000    <dbl> 0.2142184, 0.1569640, 0.2439316, 0.2294095,…
$ fatality_rate_per_1000  <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000…
$ pct_vehicle             <dbl> 2.0165122, 1.9222885, 2.1120415, 2.0340979,…
$ borough                 <chr> "Bronx", "Bronx", "Bronx", "Bronx", "Bronx"…
$ post_pandemic           <int> 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1…

ADD INTERACTION FEATURES

df <- df %>%
  mutate(
    car_density_interaction = pct_vehicle * log1p(total_population),
    income_vehicle_interaction = median_income * pct_vehicle
  )

HOT-ENCODE YEAR

I will treat year as a factor and one-hot encode it.

df$year <- as.factor(df$year)
year_dummies <- model.matrix(~ year - 1, data = df)

df <- cbind(df[ , !(names(df) %in% c("year"))], year_dummies)

PREPARE TARGET

We will remove all possible target variables and keep only one per model training.

# Choose your target variable (e.g., crash rate per 1,000 residents)
target_var <- "crash_rate_per_1000"

# Remove all target variables except selected
cols_to_remove <- grep("per_1000", 
                       names(df), 
                       value = TRUE)
cols_to_remove <- setdiff(cols_to_remove, target_var) # keep this column

df <- df %>% select(-all_of(cols_to_remove),)

# Create feature matrix and target vector
X <- df %>% select(-target_var, -borough, -total_population)
y <- df[[target_var]]

HYPERPARAMETER TUNING

What This Does - Uses R’s xgboost::xgb.cv() to evaluate each parameter set. - Optuna (Python) handles the search space and Bayesian optimization. - The final best parameters are applied to fit the final_model. - Search space: Instead of predefined grids, trial$suggest_float() and trial$suggest_int() explore a range of values. - Best parameters: study$best_params holds the optimal hyperparameters.

## CONVERT TO DMATRIX
dtrain_all <- xgb.DMatrix(data = as.matrix(X), label = y)

## Start Python venv
reticulate::use_virtualenv("r-reticulate", required = TRUE)

## OPTUNA-BASED SPATIAL CV
optuna <- import("optuna")

boroughs <- unique(df$borough)
folds <- lapply(boroughs, function(b) which(df$borough != b))

# Optuna objective
objective <- function(trial) {
  params <- list(
    booster = "gbtree",
    eta = trial$suggest_float("eta", 0.01, 0.3, log = TRUE),
    max_depth = trial$suggest_int("max_depth", 3, 12),
    min_child_weight = trial$suggest_int("min_child_weight", 1, 10),
    subsample = trial$suggest_float("subsample", 0.5, 1.0),
    colsample_bytree = trial$suggest_float("colsample_bytree", 0.5, 1.0),
    gamma = trial$suggest_float("gamma", 0, 10),
    lambda = trial$suggest_float("lambda", 0, 10),
    alpha = trial$suggest_float("alpha", 0, 10)
  )
  
  rmse_scores <- numeric(length(folds))
  for (i in seq_along(folds)) {
    train_idx <- folds[[i]]
    valid_idx <- setdiff(seq_len(nrow(dtrain_all)), train_idx)

    dtrain <- xgb.DMatrix(data = as.matrix(X[train_idx, ]), label = y[train_idx])
    dvalid <- xgb.DMatrix(data = as.matrix(X[valid_idx, ]), label = y[valid_idx])

    model <- xgb.train(
      params = params,
      data = dtrain,
      nrounds = 500,
      watchlist = list(val = dvalid),
      early_stopping_rounds = 20,
      verbose = 0
    )

    rmse_scores[i] <- min(model$evaluation_log$val_rmse)
  }
  
  preds <- predict(model, as.matrix(X[valid_idx, ]))
  return(Metrics::rmse(y[valid_idx], preds))
}

# Run Optuna study
set.seed(2025)
study <- optuna$create_study(direction = "minimize")
study$optimize(objective, n_trials = 50)

best_params <- study$best_params
print(best_params)
$eta
[1] 0.1201187

$max_depth
[1] 5

$min_child_weight
[1] 8

$subsample
[1] 0.6164199

$colsample_bytree
[1] 0.6999922

$gamma
[1] 3.719088

$lambda
[1] 5.17341

$alpha
[1] 3.270779

TRAIN/TEST SPLIT

# Set seed
set.seed(2025)

# Split by index
train_index <- createDataPartition(y, p = 0.8, list = FALSE)
X_train <- X[train_index, ]
y_train <- y[train_index]
X_test <- X[-train_index, ]
y_test <- y[-train_index]

# Convert to xgb.DMatrix
dtrain <- xgb.DMatrix(data = as.matrix(X_train), label = y_train)
dtest <- xgb.DMatrix(data = as.matrix(X_test), label = y_test)

TRAIN MODEL

# Set seed
set.seed(2025)

# Training with parallel processing
final_model <- xgb.train(
  params = list(
    eta = best_params$eta,
    max_depth = best_params$max_depth,
    gamma = best_params$gamma,
    colsample_bytree = best_params$colsample_bytree,
    min_child_weight = best_params$min_child_weight,
    subsample = best_params$subsample,
    objective = "reg:squarederror",
    eval_metric = "rmse"
  ),
  data = dtrain,
  nrounds = 1000,
  watchlist = list(train = dtrain, test = dtest),
  early_stopping_rounds = 20,
  verbose = 1,
  nthread = detectCores() - 1
)
[1] train-rmse:1.959935 test-rmse:1.943103 
Multiple eval metrics are present. Will use test_rmse for early stopping.
Will train until test_rmse hasn't improved in 20 rounds.

[2] train-rmse:1.885766 test-rmse:1.870772 
[3] train-rmse:1.826508 test-rmse:1.818521 
[4] train-rmse:1.786537 test-rmse:1.782901 
[5] train-rmse:1.741575 test-rmse:1.746282 
[6] train-rmse:1.701737 test-rmse:1.715650 
[7] train-rmse:1.675409 test-rmse:1.692276 
[8] train-rmse:1.649850 test-rmse:1.670641 
[9] train-rmse:1.628597 test-rmse:1.661646 
[10]    train-rmse:1.606858 test-rmse:1.642966 
[11]    train-rmse:1.597939 test-rmse:1.636465 
[12]    train-rmse:1.578917 test-rmse:1.631226 
[13]    train-rmse:1.564573 test-rmse:1.621342 
[14]    train-rmse:1.549118 test-rmse:1.614985 
[15]    train-rmse:1.539681 test-rmse:1.610942 
[16]    train-rmse:1.523881 test-rmse:1.601895 
[17]    train-rmse:1.513709 test-rmse:1.593075 
[18]    train-rmse:1.507580 test-rmse:1.586184 
[19]    train-rmse:1.500953 test-rmse:1.586513 
[20]    train-rmse:1.493454 test-rmse:1.584122 
[21]    train-rmse:1.490005 test-rmse:1.581247 
[22]    train-rmse:1.478843 test-rmse:1.575141 
[23]    train-rmse:1.476069 test-rmse:1.573897 
[24]    train-rmse:1.470877 test-rmse:1.570920 
[25]    train-rmse:1.466444 test-rmse:1.570463 
[26]    train-rmse:1.459459 test-rmse:1.568340 
[27]    train-rmse:1.451448 test-rmse:1.567919 
[28]    train-rmse:1.447033 test-rmse:1.569194 
[29]    train-rmse:1.439085 test-rmse:1.566178 
[30]    train-rmse:1.435631 test-rmse:1.564914 
[31]    train-rmse:1.433212 test-rmse:1.564180 
[32]    train-rmse:1.427243 test-rmse:1.561112 
[33]    train-rmse:1.422343 test-rmse:1.560981 
[34]    train-rmse:1.418735 test-rmse:1.561259 
[35]    train-rmse:1.414467 test-rmse:1.559647 
[36]    train-rmse:1.409145 test-rmse:1.558684 
[37]    train-rmse:1.402428 test-rmse:1.556404 
[38]    train-rmse:1.397734 test-rmse:1.554198 
[39]    train-rmse:1.396167 test-rmse:1.553283 
[40]    train-rmse:1.391987 test-rmse:1.552270 
[41]    train-rmse:1.387588 test-rmse:1.552181 
[42]    train-rmse:1.384995 test-rmse:1.552003 
[43]    train-rmse:1.378946 test-rmse:1.550981 
[44]    train-rmse:1.375850 test-rmse:1.553506 
[45]    train-rmse:1.372297 test-rmse:1.553599 
[46]    train-rmse:1.368335 test-rmse:1.553950 
[47]    train-rmse:1.364855 test-rmse:1.551612 
[48]    train-rmse:1.359726 test-rmse:1.551580 
[49]    train-rmse:1.358148 test-rmse:1.550497 
[50]    train-rmse:1.354803 test-rmse:1.550000 
[51]    train-rmse:1.350247 test-rmse:1.550253 
[52]    train-rmse:1.347000 test-rmse:1.552812 
[53]    train-rmse:1.344932 test-rmse:1.551698 
[54]    train-rmse:1.340769 test-rmse:1.551204 
[55]    train-rmse:1.339520 test-rmse:1.551740 
[56]    train-rmse:1.335570 test-rmse:1.546184 
[57]    train-rmse:1.329665 test-rmse:1.550643 
[58]    train-rmse:1.326548 test-rmse:1.552036 
[59]    train-rmse:1.324028 test-rmse:1.549153 
[60]    train-rmse:1.320261 test-rmse:1.549236 
[61]    train-rmse:1.315158 test-rmse:1.547558 
[62]    train-rmse:1.310720 test-rmse:1.545829 
[63]    train-rmse:1.304922 test-rmse:1.545195 
[64]    train-rmse:1.298181 test-rmse:1.542381 
[65]    train-rmse:1.296212 test-rmse:1.542564 
[66]    train-rmse:1.292331 test-rmse:1.543948 
[67]    train-rmse:1.288435 test-rmse:1.543639 
[68]    train-rmse:1.286875 test-rmse:1.541403 
[69]    train-rmse:1.283858 test-rmse:1.540771 
[70]    train-rmse:1.278343 test-rmse:1.536453 
[71]    train-rmse:1.276273 test-rmse:1.535691 
[72]    train-rmse:1.273783 test-rmse:1.534360 
[73]    train-rmse:1.270261 test-rmse:1.532495 
[74]    train-rmse:1.266530 test-rmse:1.532205 
[75]    train-rmse:1.265671 test-rmse:1.531939 
[76]    train-rmse:1.263510 test-rmse:1.531678 
[77]    train-rmse:1.261402 test-rmse:1.532488 
[78]    train-rmse:1.259280 test-rmse:1.532332 
[79]    train-rmse:1.257066 test-rmse:1.534984 
[80]    train-rmse:1.254124 test-rmse:1.534008 
[81]    train-rmse:1.252545 test-rmse:1.533317 
[82]    train-rmse:1.246175 test-rmse:1.533971 
[83]    train-rmse:1.244708 test-rmse:1.532679 
[84]    train-rmse:1.238223 test-rmse:1.533592 
[85]    train-rmse:1.234989 test-rmse:1.531914 
[86]    train-rmse:1.232076 test-rmse:1.529619 
[87]    train-rmse:1.229449 test-rmse:1.528281 
[88]    train-rmse:1.227759 test-rmse:1.527017 
[89]    train-rmse:1.227075 test-rmse:1.528388 
[90]    train-rmse:1.224285 test-rmse:1.530681 
[91]    train-rmse:1.222168 test-rmse:1.530990 
[92]    train-rmse:1.216858 test-rmse:1.531737 
[93]    train-rmse:1.215050 test-rmse:1.531465 
[94]    train-rmse:1.214020 test-rmse:1.531490 
[95]    train-rmse:1.213599 test-rmse:1.530946 
[96]    train-rmse:1.211771 test-rmse:1.530869 
[97]    train-rmse:1.209781 test-rmse:1.532405 
[98]    train-rmse:1.208736 test-rmse:1.533464 
[99]    train-rmse:1.208342 test-rmse:1.533837 
[100]   train-rmse:1.203152 test-rmse:1.535616 
[101]   train-rmse:1.200668 test-rmse:1.535577 
[102]   train-rmse:1.198942 test-rmse:1.535120 
[103]   train-rmse:1.196072 test-rmse:1.536238 
[104]   train-rmse:1.192535 test-rmse:1.533738 
[105]   train-rmse:1.189005 test-rmse:1.531056 
[106]   train-rmse:1.187788 test-rmse:1.529239 
[107]   train-rmse:1.183801 test-rmse:1.528171 
[108]   train-rmse:1.183145 test-rmse:1.528352 
Stopping. Best iteration:
[88]    train-rmse:1.227759 test-rmse:1.527017

SAVE MODEL

# Create directory if it doesn't exist
if (!dir.exists("../data/models")) {
  dir.create("../data/models", recursive = TRUE)
}

# Save the final XGBoost model
saveRDS(final_model, file = "../data/models/spatial_cv_model.rds")

# Save the best parameters
saveRDS(best_params, file = "../data/models/spatial_cv_best_params.rds")

cat("Model and parameters saved to ../data/models/")
Model and parameters saved to ../data/models/

MODEL EVALUATION

library(Metrics)
library(ggplot2)
library(dplyr)

set.seed(2025)

# Predict on test set
preds <- predict(final_model, as.matrix(X_test))

# --- Metrics ---
rmse <- sqrt(mean((y_test - preds)^2))
mae <- mean(abs(y_test - preds))
mape <- mean(abs((y_test - preds) / y_test)) * 100
r2 <- 1 - (sum((y_test - preds)^2) / sum((y_test - mean(y_test))^2))

cat("Model Evaluation Metrics:\n")
cat("  RMSE:", rmse, "\n")
cat("  MAE :", mae, "\n")
cat("  MAPE:", mape, "%\n")
cat("  R²  :", r2, "\n\n")

# --- Residuals ---
residuals <- y_test - preds
residual_df <- data.frame(
  actual = y_test,
  predicted = preds,
  residuals = residuals
)

# --- Plot: Predicted vs Actual ---
p1 <- residual_df %>%
  ggplot(aes(x = actual, y = predicted)) +
  geom_point(alpha = 0.5) +
  geom_abline(slope = 1, intercept = 0, color = "red") +
  theme_minimal() +
  labs(title = "Predicted vs Actual Crash Rates",
       x = "Actual",
       y = "Predicted")

# --- Plot: Residuals vs Predicted ---
p2 <- residual_df %>%
  ggplot(aes(x = predicted, y = residuals)) +
  geom_point(alpha = 0.5, color = "blue") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  theme_minimal() +
  labs(title = "Residuals vs Predicted",
       x = "Predicted",
       y = "Residuals")

# --- Plot: Residual Density ---
p3 <- residual_df %>%
  ggplot(aes(x = residuals)) +
  geom_histogram(aes(y = ..density..), bins = 30, fill = "skyblue", alpha = 0.7) +
  geom_density(color = "red") +
  theme_minimal() +
  labs(title = "Residual Distribution",
       x = "Residuals",
       y = "Density")

# Print plots
print(p1)
print(p2)
print(p3)

SHAP EXPLAINABILITY

# Compute SHAP values
shap_values <- shap.values(xgb_model = final_model, X_train = as.matrix(X_train))
shap_long <- shap.prep(shap_contrib = shap_values$shap_score, X_train = as.matrix(X_train))

# SHAP summary plot
print(shap.plot.summary(shap_long))

if (!dir.exists("../report/plots")) {
  dir.create("../report/plots")
}

png("../report/plots/spatial_cv_shap_summary_plot.png", width = 1200, height = 800)
shap.plot.summary(shap_long)
dev.off()
quartz_off_screen 
                2 

INSPECT TREE ENSEMBLE

xgb.plot.tree(model = final_model, trees = 0)
xgb.plot.tree(model = final_model, trees = 1)
xgb.plot.tree(model = final_model, trees = 2)
xgb.plot.multi.trees(model = final_model)
# ============================================================
# Additional Model Diagnostics and Deeper Analysis
# ============================================================

library(ggplot2)
library(dplyr)
library(pdp)        # For Partial Dependence Plots
library(DALEX)      # For model explainability
library(ggthemes)
library(sf)

# ---------------------------
# 1. SHAP Dependence and Interaction Plots
# ---------------------------
message("\nGenerating SHAP dependence and interaction plots...")

# Assuming shap_values and shap_long are already computed
# (if not, recompute them using iml or SHAPforxgboost packages)

# Top feature by SHAP importance
top_feature <- shap_long %>%
  as_tibble() %>%
  count(variable, wt = abs(value), sort = TRUE) %>%
  dplyr::slice(1) %>%
  pull(variable)

# Dependence plot for top feature
shap.plot.dependence(data_long = shap_long, x = top_feature, color_feature = top_feature)


# Interaction values
shap_interaction_values <- predict(
  final_model,
  as.matrix(X_train),
  predinteraction = TRUE
)

# shap_interaction_values will be a 3D array: [n_samples, n_features, n_features]
dim(shap_interaction_values)
[1] 10816    48    48
# ---------------------------
# 2. Residual Plots and Mapping
# ---------------------------
message("\nComputing residuals and creating residual plots...")

preds <- predict(final_model, as.matrix(X_test))
residuals <- y_test - preds

residual_df <- data.frame(
  observed = y_test,
  predicted = preds,
  residual = residuals
)

# Predicted vs Observed
ggplot(residual_df, aes(x = predicted, y = observed)) +
  geom_point(alpha = 0.6) +
  geom_abline(slope = 1, intercept = 0, color = "red") +
  theme_minimal() +
  labs(title = "Predicted vs. Observed Crash Rates",
       x = "Predicted Crash Rate per 1,000",
       y = "Observed Crash Rate per 1,000")


# Residual Histogram
ggplot(residual_df, aes(x = residual)) +
  geom_histogram(binwidth = 0.2, fill = "steelblue", color = "white") +
  theme_minimal() +
  labs(title = "Residual Distribution", x = "Residuals", y = "Count")

# ---------------------------
# 3. Partial Dependence Plots (PDP)
# ---------------------------
message("\nGenerating Partial Dependence Plots...")

top_features <- shap_long %>%
  count(variable, wt = abs(value), sort = TRUE) %>%
  dplyr::slice(1:10) %>%
  pull(variable)

for (f in top_features) {
  pd <- partial(final_model, pred.var = f, train = as.matrix(X_train), grid.resolution = 30)
  plot(pd, main = paste("Partial Dependence of", f))
}

LS0tCnRpdGxlOiAiWEdCb29zdCBNb2RlbGluZyBSIE5vdGVib29rIgphdXRob3I6ICJBSiBTdHJhdW1hbi1TY290dCIKZGF0ZTogImByIFN5cy5EYXRlKClgIgpvdXRwdXQ6CiAgcGRmX2RvY3VtZW50OiBkZWZhdWx0CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAogIGh0bWxfZG9jdW1lbnQ6CiAgICBkZl9wcmludDogcGFnZWQKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFLCB3YXJuaW5nID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRSkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoY2FyZXQpCmxpYnJhcnkoeGdib29zdCkKbGlicmFyeShTSEFQZm9yeGdib29zdCkKbGlicmFyeShNYXRyaXgpCmxpYnJhcnkocmV0aWN1bGF0ZSkKbGlicmFyeShkb1BhcmFsbGVsKQpgYGAKCiMjIExPQUQgREFUQQoKYGBge3IgbG9hZC1kYXRhfQpkZiA8LSByZWFkUkRTKCIuLi9kYXRhL21vZGVscy9zb2NpYWwtcmlzay1jcmFzaC1yYXRlLWRhdGEucmRzIikKZ2xpbXBzZShkZikKYGBgCgojIyBBREQgUEFOREVNSUMgVEVNUE9SQUwgRkVBVFVSRQoKYGBge3IgdGVtcG9yYWwtZmVhdHVyZXN9CmRmIDwtIGRmICU+JQogIG11dGF0ZSgKICAgIHBvc3RfcGFuZGVtaWMgPSBpZmVsc2UoeWVhciA8IDIwMjAsICJwcmUiLCAicG9zdCIpICAjIHByZSB2cy4gcG9zdCAyMDIwCiAgKSAlPiUKICBtdXRhdGUocG9zdF9wYW5kZW1pYyA9IGFzLmludGVnZXIocG9zdF9wYW5kZW1pYyA9PSAicG9zdCIpKQpgYGAKCiMjIEFERCBJTlRFUkFDVElPTiBGRUFUVVJFUwoKYGBge3IgaW50ZXJhY3Rpb25zfQpkZiA8LSBkZiAlPiUKICBtdXRhdGUoCiAgICBjYXJfZGVuc2l0eV9pbnRlcmFjdGlvbiA9IHBjdF92ZWhpY2xlICogbG9nMXAodG90YWxfcG9wdWxhdGlvbiksCiAgICBpbmNvbWVfdmVoaWNsZV9pbnRlcmFjdGlvbiA9IG1lZGlhbl9pbmNvbWUgKiBwY3RfdmVoaWNsZQogICkKYGBgCgojIyBIT1QtRU5DT0RFIGBZRUFSYAoKSSB3aWxsIHRyZWF0IGB5ZWFyYCBhcyBhIGZhY3RvciBhbmQgb25lLWhvdCBlbmNvZGUgaXQuCgpgYGB7ciBob3QtZW5jb2RlLXllYXJ9CmRmJHllYXIgPC0gYXMuZmFjdG9yKGRmJHllYXIpCnllYXJfZHVtbWllcyA8LSBtb2RlbC5tYXRyaXgofiB5ZWFyIC0gMSwgZGF0YSA9IGRmKQoKZGYgPC0gY2JpbmQoZGZbICwgIShuYW1lcyhkZikgJWluJSBjKCJ5ZWFyIikpXSwgeWVhcl9kdW1taWVzKQpgYGAKCiMjIFBSRVBBUkUgVEFSR0VUCgpXZSB3aWxsIHJlbW92ZSBhbGwgcG9zc2libGUgdGFyZ2V0IHZhcmlhYmxlcyBhbmQga2VlcCBvbmx5IG9uZSBwZXIgbW9kZWwgdHJhaW5pbmcuCgpgYGB7ciBzaW5nbGUtdGFyZ2V0LWRmfQojIENob29zZSB5b3VyIHRhcmdldCB2YXJpYWJsZSAoZS5nLiwgY3Jhc2ggcmF0ZSBwZXIgMSwwMDAgcmVzaWRlbnRzKQp0YXJnZXRfdmFyIDwtICJjcmFzaF9yYXRlX3Blcl8xMDAwIgoKIyBSZW1vdmUgYWxsIHRhcmdldCB2YXJpYWJsZXMgZXhjZXB0IHNlbGVjdGVkCmNvbHNfdG9fcmVtb3ZlIDwtIGdyZXAoInBlcl8xMDAwIiwgCiAgICAgICAgICAgICAgICAgICAgICAgbmFtZXMoZGYpLCAKICAgICAgICAgICAgICAgICAgICAgICB2YWx1ZSA9IFRSVUUpCmNvbHNfdG9fcmVtb3ZlIDwtIHNldGRpZmYoY29sc190b19yZW1vdmUsIHRhcmdldF92YXIpICMga2VlcCB0aGlzIGNvbHVtbgoKZGYgPC0gZGYgJT4lIHNlbGVjdCgtYWxsX29mKGNvbHNfdG9fcmVtb3ZlKSwpCgojIENyZWF0ZSBmZWF0dXJlIG1hdHJpeCBhbmQgdGFyZ2V0IHZlY3RvcgpYIDwtIGRmICU+JSBzZWxlY3QoLXRhcmdldF92YXIsIC1ib3JvdWdoLCAtdG90YWxfcG9wdWxhdGlvbiwgLWdlb2lkKQp5IDwtIGRmW1t0YXJnZXRfdmFyXV0KCmBgYAoKCiMjIEhZUEVSUEFSQU1FVEVSIFRVTklORwoKKipXaGF0IFRoaXMgRG9lcyoqCi0gVXNlcyAqKlLigJlzIGB4Z2Jvb3N0Ojp4Z2IuY3YoKWAqKiB0byBldmFsdWF0ZSBlYWNoIHBhcmFtZXRlciBzZXQuCi0gT3B0dW5hIChQeXRob24pIGhhbmRsZXMgdGhlIHNlYXJjaCBzcGFjZSBhbmQgQmF5ZXNpYW4gb3B0aW1pemF0aW9uLgotIFRoZSBmaW5hbCBiZXN0IHBhcmFtZXRlcnMgYXJlIGFwcGxpZWQgdG8gZml0IHRoZSBgZmluYWxfbW9kZWxgLgotICoqU2VhcmNoIHNwYWNlOioqIEluc3RlYWQgb2YgcHJlZGVmaW5lZCBncmlkcywgYHRyaWFsJHN1Z2dlc3RfZmxvYXQoKWAgYW5kIGB0cmlhbCRzdWdnZXN0X2ludCgpYCBleHBsb3JlIGEgcmFuZ2Ugb2YgdmFsdWVzLgotICoqQmVzdCBwYXJhbWV0ZXJzOioqIGBzdHVkeSRiZXN0X3BhcmFtc2AgaG9sZHMgdGhlIG9wdGltYWwgaHlwZXJwYXJhbWV0ZXJzLgoKYGBge3IgdHVuZS1wYXJhbXN9CiMjIENPTlZFUlQgVE8gRE1BVFJJWApkdHJhaW5fYWxsIDwtIHhnYi5ETWF0cml4KGRhdGEgPSBhcy5tYXRyaXgoWCksIGxhYmVsID0geSkKCiMjIFN0YXJ0IFB5dGhvbiB2ZW52CnJldGljdWxhdGU6OnVzZV92aXJ0dWFsZW52KCJyLXJldGljdWxhdGUiLCByZXF1aXJlZCA9IFRSVUUpCgojIyBPUFRVTkEtQkFTRUQgU1BBVElBTCBDVgpvcHR1bmEgPC0gaW1wb3J0KCJvcHR1bmEiKQoKYm9yb3VnaHMgPC0gdW5pcXVlKGRmJGJvcm91Z2gpCmZvbGRzIDwtIGxhcHBseShib3JvdWdocywgZnVuY3Rpb24oYikgd2hpY2goZGYkYm9yb3VnaCAhPSBiKSkKCiMgT3B0dW5hIG9iamVjdGl2ZQpvYmplY3RpdmUgPC0gZnVuY3Rpb24odHJpYWwpIHsKICBwYXJhbXMgPC0gbGlzdCgKICAgIGJvb3N0ZXIgPSAiZ2J0cmVlIiwKICAgIGV0YSA9IHRyaWFsJHN1Z2dlc3RfZmxvYXQoImV0YSIsIDAuMDEsIDAuMywgbG9nID0gVFJVRSksCiAgICBtYXhfZGVwdGggPSB0cmlhbCRzdWdnZXN0X2ludCgibWF4X2RlcHRoIiwgMywgMTIpLAogICAgbWluX2NoaWxkX3dlaWdodCA9IHRyaWFsJHN1Z2dlc3RfaW50KCJtaW5fY2hpbGRfd2VpZ2h0IiwgMSwgMTApLAogICAgc3Vic2FtcGxlID0gdHJpYWwkc3VnZ2VzdF9mbG9hdCgic3Vic2FtcGxlIiwgMC41LCAxLjApLAogICAgY29sc2FtcGxlX2J5dHJlZSA9IHRyaWFsJHN1Z2dlc3RfZmxvYXQoImNvbHNhbXBsZV9ieXRyZWUiLCAwLjUsIDEuMCksCiAgICBnYW1tYSA9IHRyaWFsJHN1Z2dlc3RfZmxvYXQoImdhbW1hIiwgMCwgMTApLAogICAgbGFtYmRhID0gdHJpYWwkc3VnZ2VzdF9mbG9hdCgibGFtYmRhIiwgMCwgMTApLAogICAgYWxwaGEgPSB0cmlhbCRzdWdnZXN0X2Zsb2F0KCJhbHBoYSIsIDAsIDEwKQogICkKICAKICBybXNlX3Njb3JlcyA8LSBudW1lcmljKGxlbmd0aChmb2xkcykpCiAgZm9yIChpIGluIHNlcV9hbG9uZyhmb2xkcykpIHsKICAgIHRyYWluX2lkeCA8LSBmb2xkc1tbaV1dCiAgICB2YWxpZF9pZHggPC0gc2V0ZGlmZihzZXFfbGVuKG5yb3coZHRyYWluX2FsbCkpLCB0cmFpbl9pZHgpCgogICAgZHRyYWluIDwtIHhnYi5ETWF0cml4KGRhdGEgPSBhcy5tYXRyaXgoWFt0cmFpbl9pZHgsIF0pLCBsYWJlbCA9IHlbdHJhaW5faWR4XSkKICAgIGR2YWxpZCA8LSB4Z2IuRE1hdHJpeChkYXRhID0gYXMubWF0cml4KFhbdmFsaWRfaWR4LCBdKSwgbGFiZWwgPSB5W3ZhbGlkX2lkeF0pCgogICAgbW9kZWwgPC0geGdiLnRyYWluKAogICAgICBwYXJhbXMgPSBwYXJhbXMsCiAgICAgIGRhdGEgPSBkdHJhaW4sCiAgICAgIG5yb3VuZHMgPSA1MDAsCiAgICAgIHdhdGNobGlzdCA9IGxpc3QodmFsID0gZHZhbGlkKSwKICAgICAgZWFybHlfc3RvcHBpbmdfcm91bmRzID0gMjAsCiAgICAgIHZlcmJvc2UgPSAwCiAgICApCgogICAgcm1zZV9zY29yZXNbaV0gPC0gbWluKG1vZGVsJGV2YWx1YXRpb25fbG9nJHZhbF9ybXNlKQogIH0KICAKICBwcmVkcyA8LSBwcmVkaWN0KG1vZGVsLCBhcy5tYXRyaXgoWFt2YWxpZF9pZHgsIF0pKQogIHJldHVybihNZXRyaWNzOjpybXNlKHlbdmFsaWRfaWR4XSwgcHJlZHMpKQp9CgojIFJ1biBPcHR1bmEgc3R1ZHkKc2V0LnNlZWQoMjAyNSkKc3R1ZHkgPC0gb3B0dW5hJGNyZWF0ZV9zdHVkeShkaXJlY3Rpb24gPSAibWluaW1pemUiKQpzdHVkeSRvcHRpbWl6ZShvYmplY3RpdmUsIG5fdHJpYWxzID0gNTApCgpiZXN0X3BhcmFtcyA8LSBzdHVkeSRiZXN0X3BhcmFtcwpwcmludChiZXN0X3BhcmFtcykKYGBgCgojIyBUUkFJTi9URVNUIFNQTElUCgpgYGB7ciB0cmFpbi10ZXN0LXNwbGl0fQojIFNldCBzZWVkCnNldC5zZWVkKDIwMjUpCgojIFNwbGl0IGJ5IGluZGV4CnRyYWluX2luZGV4IDwtIGNyZWF0ZURhdGFQYXJ0aXRpb24oeSwgcCA9IDAuOCwgbGlzdCA9IEZBTFNFKQpYX3RyYWluIDwtIFhbdHJhaW5faW5kZXgsIF0KeV90cmFpbiA8LSB5W3RyYWluX2luZGV4XQpYX3Rlc3QgPC0gWFstdHJhaW5faW5kZXgsIF0KeV90ZXN0IDwtIHlbLXRyYWluX2luZGV4XQoKIyBDb252ZXJ0IHRvIHhnYi5ETWF0cml4CmR0cmFpbiA8LSB4Z2IuRE1hdHJpeChkYXRhID0gYXMubWF0cml4KFhfdHJhaW4pLCBsYWJlbCA9IHlfdHJhaW4pCmR0ZXN0IDwtIHhnYi5ETWF0cml4KGRhdGEgPSBhcy5tYXRyaXgoWF90ZXN0KSwgbGFiZWwgPSB5X3Rlc3QpCmBgYAoKIyMgVFJBSU4gTU9ERUwKCmBgYHtyIHRyYWluLW1vZGVsfQojIFNldCBzZWVkCnNldC5zZWVkKDIwMjUpCgojIFRyYWluaW5nIHdpdGggcGFyYWxsZWwgcHJvY2Vzc2luZwpmaW5hbF9tb2RlbCA8LSB4Z2IudHJhaW4oCiAgcGFyYW1zID0gbGlzdCgKICAgIGV0YSA9IGJlc3RfcGFyYW1zJGV0YSwKICAgIG1heF9kZXB0aCA9IGJlc3RfcGFyYW1zJG1heF9kZXB0aCwKICAgIGdhbW1hID0gYmVzdF9wYXJhbXMkZ2FtbWEsCiAgICBjb2xzYW1wbGVfYnl0cmVlID0gYmVzdF9wYXJhbXMkY29sc2FtcGxlX2J5dHJlZSwKICAgIG1pbl9jaGlsZF93ZWlnaHQgPSBiZXN0X3BhcmFtcyRtaW5fY2hpbGRfd2VpZ2h0LAogICAgc3Vic2FtcGxlID0gYmVzdF9wYXJhbXMkc3Vic2FtcGxlLAogICAgb2JqZWN0aXZlID0gInJlZzpzcXVhcmVkZXJyb3IiLAogICAgZXZhbF9tZXRyaWMgPSAicm1zZSIKICApLAogIGRhdGEgPSBkdHJhaW4sCiAgbnJvdW5kcyA9IDEwMDAsCiAgd2F0Y2hsaXN0ID0gbGlzdCh0cmFpbiA9IGR0cmFpbiwgdGVzdCA9IGR0ZXN0KSwKICBlYXJseV9zdG9wcGluZ19yb3VuZHMgPSAyMCwKICB2ZXJib3NlID0gMSwKICBudGhyZWFkID0gZGV0ZWN0Q29yZXMoKSAtIDEKKQpgYGAKCiMjIFNBVkUgTU9ERUwKCmBgYHtyIHNhdmUtbW9kZWx9CiMgQ3JlYXRlIGRpcmVjdG9yeSBpZiBpdCBkb2Vzbid0IGV4aXN0CmlmICghZGlyLmV4aXN0cygiLi4vZGF0YS9tb2RlbHMiKSkgewogIGRpci5jcmVhdGUoIi4uL2RhdGEvbW9kZWxzIiwgcmVjdXJzaXZlID0gVFJVRSkKfQoKIyBTYXZlIHRoZSBmaW5hbCBYR0Jvb3N0IG1vZGVsCnNhdmVSRFMoZmluYWxfbW9kZWwsIGZpbGUgPSAiLi4vZGF0YS9tb2RlbHMvc3BhdGlhbF9jdl9tb2RlbC5yZHMiKQoKIyBTYXZlIHRoZSBiZXN0IHBhcmFtZXRlcnMKc2F2ZVJEUyhiZXN0X3BhcmFtcywgZmlsZSA9ICIuLi9kYXRhL21vZGVscy9zcGF0aWFsX2N2X2Jlc3RfcGFyYW1zLnJkcyIpCgpjYXQoIk1vZGVsIGFuZCBwYXJhbWV0ZXJzIHNhdmVkIHRvIC4uL2RhdGEvbW9kZWxzLyIpCmBgYAoKIyMgTU9ERUwgRVZBTFVBVElPTgoKYGBge3IgZXZhbC1tb2RlbCwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KbGlicmFyeShNZXRyaWNzKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoZHBseXIpCgpzZXQuc2VlZCgyMDI1KQoKIyBQcmVkaWN0IG9uIHRlc3Qgc2V0CnByZWRzIDwtIHByZWRpY3QoZmluYWxfbW9kZWwsIGFzLm1hdHJpeChYX3Rlc3QpKQoKIyAtLS0gTWV0cmljcyAtLS0Kcm1zZSA8LSBzcXJ0KG1lYW4oKHlfdGVzdCAtIHByZWRzKV4yKSkKbWFlIDwtIG1lYW4oYWJzKHlfdGVzdCAtIHByZWRzKSkKbWFwZSA8LSBtZWFuKGFicygoeV90ZXN0IC0gcHJlZHMpIC8geV90ZXN0KSkgKiAxMDAKcjIgPC0gMSAtIChzdW0oKHlfdGVzdCAtIHByZWRzKV4yKSAvIHN1bSgoeV90ZXN0IC0gbWVhbih5X3Rlc3QpKV4yKSkKCmNhdCgiTW9kZWwgRXZhbHVhdGlvbiBNZXRyaWNzOlxuIikKY2F0KCIgIFJNU0U6Iiwgcm1zZSwgIlxuIikKY2F0KCIgIE1BRSA6IiwgbWFlLCAiXG4iKQpjYXQoIiAgTUFQRToiLCBtYXBlLCAiJVxuIikKY2F0KCIgIFLCsiAgOiIsIHIyLCAiXG5cbiIpCgojIC0tLSBSZXNpZHVhbHMgLS0tCnJlc2lkdWFscyA8LSB5X3Rlc3QgLSBwcmVkcwpyZXNpZHVhbF9kZiA8LSBkYXRhLmZyYW1lKAogIGFjdHVhbCA9IHlfdGVzdCwKICBwcmVkaWN0ZWQgPSBwcmVkcywKICByZXNpZHVhbHMgPSByZXNpZHVhbHMKKQoKIyAtLS0gUGxvdDogUHJlZGljdGVkIHZzIEFjdHVhbCAtLS0KcDEgPC0gcmVzaWR1YWxfZGYgJT4lCiAgZ2dwbG90KGFlcyh4ID0gYWN0dWFsLCB5ID0gcHJlZGljdGVkKSkgKwogIGdlb21fcG9pbnQoYWxwaGEgPSAwLjUpICsKICBnZW9tX2FibGluZShzbG9wZSA9IDEsIGludGVyY2VwdCA9IDAsIGNvbG9yID0gInJlZCIpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIGxhYnModGl0bGUgPSAiUHJlZGljdGVkIHZzIEFjdHVhbCBDcmFzaCBSYXRlcyIsCiAgICAgICB4ID0gIkFjdHVhbCIsCiAgICAgICB5ID0gIlByZWRpY3RlZCIpCgojIC0tLSBQbG90OiBSZXNpZHVhbHMgdnMgUHJlZGljdGVkIC0tLQpwMiA8LSByZXNpZHVhbF9kZiAlPiUKICBnZ3Bsb3QoYWVzKHggPSBwcmVkaWN0ZWQsIHkgPSByZXNpZHVhbHMpKSArCiAgZ2VvbV9wb2ludChhbHBoYSA9IDAuNSwgY29sb3IgPSAiYmx1ZSIpICsKICBnZW9tX2hsaW5lKHlpbnRlcmNlcHQgPSAwLCBsaW5ldHlwZSA9ICJkYXNoZWQiLCBjb2xvciA9ICJyZWQiKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICBsYWJzKHRpdGxlID0gIlJlc2lkdWFscyB2cyBQcmVkaWN0ZWQiLAogICAgICAgeCA9ICJQcmVkaWN0ZWQiLAogICAgICAgeSA9ICJSZXNpZHVhbHMiKQoKIyAtLS0gUGxvdDogUmVzaWR1YWwgRGVuc2l0eSAtLS0KcDMgPC0gcmVzaWR1YWxfZGYgJT4lCiAgZ2dwbG90KGFlcyh4ID0gcmVzaWR1YWxzKSkgKwogIGdlb21faGlzdG9ncmFtKGFlcyh5ID0gLi5kZW5zaXR5Li4pLCBiaW5zID0gMzAsIGZpbGwgPSAic2t5Ymx1ZSIsIGFscGhhID0gMC43KSArCiAgZ2VvbV9kZW5zaXR5KGNvbG9yID0gInJlZCIpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIGxhYnModGl0bGUgPSAiUmVzaWR1YWwgRGlzdHJpYnV0aW9uIiwKICAgICAgIHggPSAiUmVzaWR1YWxzIiwKICAgICAgIHkgPSAiRGVuc2l0eSIpCgojIFByaW50IHBsb3RzCnByaW50KHAxKQpwcmludChwMikKcHJpbnQocDMpCmBgYAoKIyMgU0hBUCBFWFBMQUlOQUJJTElUWQoKYGBge3Igc2hhcH0KIyBDb21wdXRlIFNIQVAgdmFsdWVzCnNoYXBfdmFsdWVzIDwtIHNoYXAudmFsdWVzKHhnYl9tb2RlbCA9IGZpbmFsX21vZGVsLCBYX3RyYWluID0gYXMubWF0cml4KFhfdHJhaW4pKQpzaGFwX2xvbmcgPC0gc2hhcC5wcmVwKHNoYXBfY29udHJpYiA9IHNoYXBfdmFsdWVzJHNoYXBfc2NvcmUsIFhfdHJhaW4gPSBhcy5tYXRyaXgoWF90cmFpbikpCgojIFNIQVAgc3VtbWFyeSBwbG90CnByaW50KHNoYXAucGxvdC5zdW1tYXJ5KHNoYXBfbG9uZykpCgppZiAoIWRpci5leGlzdHMoIi4uL3JlcG9ydC9wbG90cyIpKSB7CiAgZGlyLmNyZWF0ZSgiLi4vcmVwb3J0L3Bsb3RzIikKfQoKcG5nKCIuLi9yZXBvcnQvcGxvdHMvc3BhdGlhbF9jdl9zaGFwX3N1bW1hcnlfcGxvdC5wbmciLCB3aWR0aCA9IDEyMDAsIGhlaWdodCA9IDgwMCkKc2hhcC5wbG90LnN1bW1hcnkoc2hhcF9sb25nKQpkZXYub2ZmKCkKYGBgCgojIyBJTlNQRUNUIFRSRUUgRU5TRU1CTEUKCmBgYHtyIGluc3BlY3QtdHJlZX0KeGdiLnBsb3QudHJlZShtb2RlbCA9IGZpbmFsX21vZGVsLCB0cmVlcyA9IDApCmBgYApgYGB7cn0KeGdiLnBsb3QudHJlZShtb2RlbCA9IGZpbmFsX21vZGVsLCB0cmVlcyA9IDEpCmBgYApgYGB7cn0KeGdiLnBsb3QudHJlZShtb2RlbCA9IGZpbmFsX21vZGVsLCB0cmVlcyA9IDIpCmBgYApgYGB7cn0KeGdiLnBsb3QubXVsdGkudHJlZXMobW9kZWwgPSBmaW5hbF9tb2RlbCkKYGBgCgpgYGB7cn0KIyA9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT0KIyBBZGRpdGlvbmFsIE1vZGVsIERpYWdub3N0aWNzIGFuZCBEZWVwZXIgQW5hbHlzaXMKIyA9PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT0KCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShkcGx5cikKbGlicmFyeShwZHApICAgICAgICAjIEZvciBQYXJ0aWFsIERlcGVuZGVuY2UgUGxvdHMKbGlicmFyeShEQUxFWCkgICAgICAjIEZvciBtb2RlbCBleHBsYWluYWJpbGl0eQpsaWJyYXJ5KGdndGhlbWVzKQpsaWJyYXJ5KHNmKQoKIyAtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KIyAxLiBTSEFQIERlcGVuZGVuY2UgYW5kIEludGVyYWN0aW9uIFBsb3RzCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCm1lc3NhZ2UoIlxuR2VuZXJhdGluZyBTSEFQIGRlcGVuZGVuY2UgYW5kIGludGVyYWN0aW9uIHBsb3RzLi4uIikKCiMgQXNzdW1pbmcgc2hhcF92YWx1ZXMgYW5kIHNoYXBfbG9uZyBhcmUgYWxyZWFkeSBjb21wdXRlZAojIChpZiBub3QsIHJlY29tcHV0ZSB0aGVtIHVzaW5nIGltbCBvciBTSEFQZm9yeGdib29zdCBwYWNrYWdlcykKCiMgVG9wIGZlYXR1cmUgYnkgU0hBUCBpbXBvcnRhbmNlCnRvcF9mZWF0dXJlIDwtIHNoYXBfbG9uZyAlPiUKICBhc190aWJibGUoKSAlPiUKICBjb3VudCh2YXJpYWJsZSwgd3QgPSBhYnModmFsdWUpLCBzb3J0ID0gVFJVRSkgJT4lCiAgZHBseXI6OnNsaWNlKDEpICU+JQogIHB1bGwodmFyaWFibGUpCgojIERlcGVuZGVuY2UgcGxvdCBmb3IgdG9wIGZlYXR1cmUKc2hhcC5wbG90LmRlcGVuZGVuY2UoZGF0YV9sb25nID0gc2hhcF9sb25nLCB4ID0gdG9wX2ZlYXR1cmUsIGNvbG9yX2ZlYXR1cmUgPSB0b3BfZmVhdHVyZSkKCiMgSW50ZXJhY3Rpb24gdmFsdWVzCnNoYXBfaW50ZXJhY3Rpb25fdmFsdWVzIDwtIHByZWRpY3QoCiAgZmluYWxfbW9kZWwsCiAgYXMubWF0cml4KFhfdHJhaW4pLAogIHByZWRpbnRlcmFjdGlvbiA9IFRSVUUKKQoKIyBzaGFwX2ludGVyYWN0aW9uX3ZhbHVlcyB3aWxsIGJlIGEgM0QgYXJyYXk6IFtuX3NhbXBsZXMsIG5fZmVhdHVyZXMsIG5fZmVhdHVyZXNdCmRpbShzaGFwX2ludGVyYWN0aW9uX3ZhbHVlcykKYGBgCmBgYHtyfQojIC0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQojIDIuIFJlc2lkdWFsIFBsb3RzIGFuZCBNYXBwaW5nCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCm1lc3NhZ2UoIlxuQ29tcHV0aW5nIHJlc2lkdWFscyBhbmQgY3JlYXRpbmcgcmVzaWR1YWwgcGxvdHMuLi4iKQoKcHJlZHMgPC0gcHJlZGljdChmaW5hbF9tb2RlbCwgYXMubWF0cml4KFhfdGVzdCkpCnJlc2lkdWFscyA8LSB5X3Rlc3QgLSBwcmVkcwoKcmVzaWR1YWxfZGYgPC0gZGF0YS5mcmFtZSgKICBvYnNlcnZlZCA9IHlfdGVzdCwKICBwcmVkaWN0ZWQgPSBwcmVkcywKICByZXNpZHVhbCA9IHJlc2lkdWFscwopCgojIFByZWRpY3RlZCB2cyBPYnNlcnZlZApnZ3Bsb3QocmVzaWR1YWxfZGYsIGFlcyh4ID0gcHJlZGljdGVkLCB5ID0gb2JzZXJ2ZWQpKSArCiAgZ2VvbV9wb2ludChhbHBoYSA9IDAuNikgKwogIGdlb21fYWJsaW5lKHNsb3BlID0gMSwgaW50ZXJjZXB0ID0gMCwgY29sb3IgPSAicmVkIikgKwogIHRoZW1lX21pbmltYWwoKSArCiAgbGFicyh0aXRsZSA9ICJQcmVkaWN0ZWQgdnMuIE9ic2VydmVkIENyYXNoIFJhdGVzIiwKICAgICAgIHggPSAiUHJlZGljdGVkIENyYXNoIFJhdGUgcGVyIDEsMDAwIiwKICAgICAgIHkgPSAiT2JzZXJ2ZWQgQ3Jhc2ggUmF0ZSBwZXIgMSwwMDAiKQoKIyBSZXNpZHVhbCBIaXN0b2dyYW0KZ2dwbG90KHJlc2lkdWFsX2RmLCBhZXMoeCA9IHJlc2lkdWFsKSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMC4yLCBmaWxsID0gInN0ZWVsYmx1ZSIsIGNvbG9yID0gIndoaXRlIikgKwogIHRoZW1lX21pbmltYWwoKSArCiAgbGFicyh0aXRsZSA9ICJSZXNpZHVhbCBEaXN0cmlidXRpb24iLCB4ID0gIlJlc2lkdWFscyIsIHkgPSAiQ291bnQiKQpgYGAKYGBge3J9CiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCiMgMy4gUGFydGlhbCBEZXBlbmRlbmNlIFBsb3RzIChQRFApCiMgLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tCm1lc3NhZ2UoIlxuR2VuZXJhdGluZyBQYXJ0aWFsIERlcGVuZGVuY2UgUGxvdHMuLi4iKQoKdG9wX2ZlYXR1cmVzIDwtIHNoYXBfbG9uZyAlPiUKICBjb3VudCh2YXJpYWJsZSwgd3QgPSBhYnModmFsdWUpLCBzb3J0ID0gVFJVRSkgJT4lCiAgZHBseXI6OnNsaWNlKDE6MTApICU+JQogIHB1bGwodmFyaWFibGUpCgpmb3IgKGYgaW4gdG9wX2ZlYXR1cmVzKSB7CiAgcGQgPC0gcGFydGlhbChmaW5hbF9tb2RlbCwgcHJlZC52YXIgPSBmLCB0cmFpbiA9IGFzLm1hdHJpeChYX3RyYWluKSwgZ3JpZC5yZXNvbHV0aW9uID0gMzApCiAgcGxvdChwZCwgbWFpbiA9IHBhc3RlKCJQYXJ0aWFsIERlcGVuZGVuY2Ugb2YiLCBmKSkKfQpgYGAK